Library import
library(SpatialExperiment)
library(data.table)
library(scater) # it imports scuttle
library(cowplot)
library(ggplot2)
theme_set(theme_bw())
library(matrixStats)
library(dplyr)
library(tidyr)
library(tibble)
library(arrow)
library(scales)
library(sf)
library(tmap)
Read-in function
readCosmxSPE <- function(dirname = dirname,
countmatfpattern = "exprMat_file.csv",
metadatafpattern = "metadata_file.csv",
coord_names = c("CenterX_global_px",
"CenterY_global_px")){
countmat_file <- file.path(dirname, list.files(dirname, countmatfpattern))
metadata_file <- file.path(dirname, list.files(dirname, metadatafpattern))
# Read in
countmat <- data.table::fread(countmat_file)
metadata <- data.table::fread(metadata_file)
# Count matrix
counts <- merge(countmat, metadata[, c("fov", "cell_ID")])
counts <- subset(counts, select = -c(cell, fov, cell_ID))
cell_codes <- rownames(counts)
features <- colnames(counts)
counts <- t(counts)
rownames(counts) <- features
colnames(counts) <- cell_codes
# rowData (does not exist)
# colData
colData <- merge(metadata, countmat[, c("fov", "cell_ID")])
spe <- SpatialExperiment::SpatialExperiment(
assays = list(counts = counts),
# rowData = rowData,
colData = colData,
spatialCoordsNames = coord_names
)
return(spe)
}
Setting directories
dirname <- "/NAS06/work/Spatial_omics/CosMx/Human cortex"
count_pattern <- "S3_exprMat_file.csv.gz"
meta_pattern <- "S3_metadata_file.csv.gz"
sample_name <- "CosMx_Human_Cortex"
SpatialExperiment object creation and exploration
spe <- readCosmxSPE(dirname = dirname, countmatfpattern = count_pattern, metadatafpattern = meta_pattern)
spe <- scuttle::addPerCellQC(spe, subsets=list(NegProb = grep("^NegPrb", rownames(spe))))
names(colData(spe)@listData)
## [1] "cell_ID"
## [2] "fov"
## [3] "cell"
## [4] "nCount_RNA"
## [5] "nFeature_RNA"
## [6] "nCount_negprobes"
## [7] "nFeature_negprobes"
## [8] "nCount_falsecode"
## [9] "nFeature_falsecode"
## [10] "Area"
## [11] "AspectRatio"
## [12] "Width"
## [13] "Height"
## [14] "Mean.Histone"
## [15] "Max.Histone"
## [16] "Mean.G"
## [17] "Max.G"
## [18] "Mean.rRNA"
## [19] "Max.rRNA"
## [20] "Mean.GFAP"
## [21] "Max.GFAP"
## [22] "Mean.DAPI"
## [23] "Max.DAPI"
## [24] "cell_id"
## [25] "assay_type"
## [26] "dualfiles"
## [27] "Run_name"
## [28] "ISH.concentration"
## [29] "Dash"
## [30] "tissue"
## [31] "slide_ID"
## [32] "Run_Tissue_name"
## [33] "Panel"
## [34] "assay_type.1"
## [35] "version"
## [36] "Area.um2"
## [37] "CenterX_local_px"
## [38] "CenterY_local_px"
## [39] "propNegative"
## [40] "complexity"
## [41] "errorCtEstimate"
## [42] "percOfDataFromError"
## [43] "qcFlagsRNACounts"
## [44] "qcFlagsCellCounts"
## [45] "qcFlagsCellPropNeg"
## [46] "qcFlagsCellComplex"
## [47] "qcFlagsCellArea"
## [48] "qcCellsFlagged"
## [49] "median_negprobes"
## [50] "negprobes_quantile_0.9"
## [51] "median_RNA"
## [52] "RNA_quantile_0.9"
## [53] "nCell"
## [54] "nCount"
## [55] "nCountPerCell"
## [56] "nFeaturePerCell"
## [57] "propNegativeCellAvg"
## [58] "complexityCellAvg"
## [59] "errorCtPerCellEstimate"
## [60] "percOfDataFromErrorPerCell"
## [61] "qcFlagsFOV"
## [62] "RefinedClusts_Final"
## [63] "prob"
## [64] "RNA_spatialClusteringNeighbours_astro.1"
## [65] "RNA_spatialClusteringNeighbours_astro.2"
## [66] "RNA_spatialClusteringNeighbours_endothelial"
## [67] "RNA_spatialClusteringNeighbours_Inh.1"
## [68] "RNA_spatialClusteringNeighbours_Inh.2"
## [69] "RNA_spatialClusteringNeighbours_Inh.3"
## [70] "RNA_spatialClusteringNeighbours_L2_3"
## [71] "RNA_spatialClusteringNeighbours_L4"
## [72] "RNA_spatialClusteringNeighbours_L6"
## [73] "RNA_spatialClusteringNeighbours_microglia.1"
## [74] "RNA_spatialClusteringNeighbours_microglia.2"
## [75] "RNA_spatialClusteringNeighbours_oligodendrocyte"
## [76] "RNA_spatialClusteringNeighbours_OPC"
## [77] "RNA_spatialClusteringNeighbours_unknown"
## [78] "spatialClusteringAssignments"
## [79] "sample_id"
## [80] "sum"
## [81] "detected"
## [82] "subsets_NegProb_sum"
## [83] "subsets_NegProb_detected"
## [84] "subsets_NegProb_percent"
## [85] "total"
dim(spe)
## [1] 6622 188686
Selecting only numeric and integer variables from metadata then only variables of interest from the list
temp <- unlist(lapply(spe@colData@listData, function(x) is.numeric(x) | is.integer(x)))
num_variables <- c()
for (i in 1:length(temp)){
num_variables[i] <- ifelse(temp[i] == TRUE, names(temp[i]), NA)
num_variables <- num_variables[!is.na(num_variables)]
}
num_variables
## [1] "cell_ID"
## [2] "fov"
## [3] "nCount_RNA"
## [4] "nFeature_RNA"
## [5] "nCount_negprobes"
## [6] "nFeature_negprobes"
## [7] "nCount_falsecode"
## [8] "nFeature_falsecode"
## [9] "Area"
## [10] "AspectRatio"
## [11] "Width"
## [12] "Height"
## [13] "Mean.Histone"
## [14] "Max.Histone"
## [15] "Mean.G"
## [16] "Max.G"
## [17] "Mean.rRNA"
## [18] "Max.rRNA"
## [19] "Mean.GFAP"
## [20] "Max.GFAP"
## [21] "Mean.DAPI"
## [22] "Max.DAPI"
## [23] "slide_ID"
## [24] "Area.um2"
## [25] "CenterX_local_px"
## [26] "CenterY_local_px"
## [27] "propNegative"
## [28] "complexity"
## [29] "errorCtEstimate"
## [30] "percOfDataFromError"
## [31] "median_negprobes"
## [32] "negprobes_quantile_0.9"
## [33] "median_RNA"
## [34] "RNA_quantile_0.9"
## [35] "nCell"
## [36] "nCount"
## [37] "nCountPerCell"
## [38] "nFeaturePerCell"
## [39] "propNegativeCellAvg"
## [40] "complexityCellAvg"
## [41] "errorCtPerCellEstimate"
## [42] "percOfDataFromErrorPerCell"
## [43] "prob"
## [44] "RNA_spatialClusteringNeighbours_astro.1"
## [45] "RNA_spatialClusteringNeighbours_astro.2"
## [46] "RNA_spatialClusteringNeighbours_endothelial"
## [47] "RNA_spatialClusteringNeighbours_Inh.1"
## [48] "RNA_spatialClusteringNeighbours_Inh.2"
## [49] "RNA_spatialClusteringNeighbours_Inh.3"
## [50] "RNA_spatialClusteringNeighbours_L2_3"
## [51] "RNA_spatialClusteringNeighbours_L4"
## [52] "RNA_spatialClusteringNeighbours_L6"
## [53] "RNA_spatialClusteringNeighbours_microglia.1"
## [54] "RNA_spatialClusteringNeighbours_microglia.2"
## [55] "RNA_spatialClusteringNeighbours_oligodendrocyte"
## [56] "RNA_spatialClusteringNeighbours_OPC"
## [57] "RNA_spatialClusteringNeighbours_unknown"
## [58] "sum"
## [59] "detected"
## [60] "subsets_NegProb_sum"
## [61] "subsets_NegProb_detected"
## [62] "subsets_NegProb_percent"
## [63] "total"
variables <- c("total", "detected")
Approaching dataset preprocessing starting from global-scale, proceeding to local-scale, then fov-focused exploratory analysis
Sample map with fov grid
fov_pattern <- "S3_fov_positions_file.csv.gz"
metadata_file <- file.path(dirname, list.files(dirname, meta_pattern))
fovpos_file <- file.path(dirname, list.files(dirname, fov_pattern))
metadata <- data.table::fread(metadata_file)
fov_positions <- data.table::fread(fovpos_file, header = T)
fov_positions <- fov_positions[fov_positions$FOV%in%unique(metadata$fov),]
fov_positions <- fov_positions |> mutate(CenterX_global_px = X_mm*25000/3, CenterY_global_px = Y_mm*25000/3)
center_x <- max(fov_positions$CenterX_global_px)/2
center_y <- max(fov_positions$CenterY_global_px)/2
fov_positions <- fov_positions |> mutate(CenterX_global_px = -(CenterX_global_px - center_x) + center_x, CenterY_global_px = -(CenterY_global_px - center_y) + center_y)
# image theme
my_image_theme <- function(back.color="black",back.border=NA,title.col="white") {
theme(panel.border = element_blank(),
legend.key = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank(),
panel.grid = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.title = element_text(color = title.col,hjust = 0.5,face = "bold"),
plot.background = element_rect(fill = "transparent",colour = back.border))
}
fov_xdim <- 4000
fov_ydim <- 4000
ggplot() +
geom_point(data = metadata, mapping = aes(x = CenterX_global_px, y = CenterY_global_px), colour = "darkmagenta", fill="darkmagenta", size = 0.05, alpha = 0.2) +
annotate("rect", xmin = fov_positions$CenterX_global_px,
xmax = fov_positions$CenterX_global_px + fov_xdim,
ymin = fov_positions$CenterY_global_px,
ymax = fov_positions$CenterY_global_px - fov_ydim,
alpha = .2, color = "black",linewidth = 0.2) +
geom_text(aes(x = fov_positions$CenterX_global_px + fov_xdim/2,
y = fov_positions$CenterY_global_px + fov_ydim/2 - fov_ydim,
label = fov_positions$FOV), color="black", fontface = "bold") +
ggtitle(sample_name) +
my_image_theme(back.color = "white", back.border = "white", title.col = "black")
Numeric/integer variable dependence on global position
Importing global coordinates from spatialCoords slot since they are not available in colData
colData(spe)$CenterX_global_px <- spatialCoords(spe)[,1]
colData(spe)$CenterY_global_px <- spatialCoords(spe)[,2]
Global coordinate scatterplot colored by variables showing spatial pattern
Global coordinate scatterplot colored by variables showing spatial pattern
spe$CenterX_global_um <- spe$CenterX_global_px*0.18
spe$CenterY_global_um <- spe$CenterY_global_px*0.18
spe$logtot <- log10(spe$total)
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "total", point_size = 0.1) + ggtitle("Total counts per cell")
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "logtot", point_size = 0.1) + ggtitle("Log-total counts per cell")
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "detected", point_size = 0.1) + ggtitle("Detected features per cell")
spe$um_area <- spe$Area*0.0324
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "um_area", point_size = 0.1) + ggtitle("Area in μm per cell")
spe$log_um_area <- log10(spe$um_area)
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "log_um_area", point_size = 0.1) + ggtitle("Log_um_area per cell")
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "AspectRatio", point_size = 0.1) + ggtitle("Width/Height ratio per cell")
spe$target_counts <- spe$total - spe$subsets_NegProb_sum
spe$target_detected <- spe$detected - spe$subsets_NegProb_detected
spe$tot_det_ratio <- spe$target_counts/spe$target_detected
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "tot_det_ratio", point_size = 0.1) + ggtitle("Target counts/detected features ratio per cell")
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "subsets_NegProb_sum", point_size = 0.1) + ggtitle("Negative control probe counts per cell")